#爬取京东商城网址购物车中的所有商品信息
import requests
import urllib.request
import ssl
from pyquery import PyQuery

# 解析购物车页面信息，获取商品信息
def parseCartHtml(html):
    # 初始化 返回pyquery对象
    doc = PyQuery(html)
    # 解析<div class="item-form"></div>中的内容-商品列表
    productlist = doc("div.item-form")
    # 遍历解析每个商品信息
    for product in productlist.items():
        image = product.find("div.p-goods div.p-img img").attr("src")  # 图片
        name = product.find("div.p-goods div.item-msg div.p-name a").text() # 商品名称
        color = ""  # 颜色
        colors = product.find("div.p-props div:first-child.props-txt")
        if len(colors) > 0:
            color = colors.text().lstrip("颜色：")
        size = ""  # 尺寸
        sizes = product.find("div.p-props div:nth-child(2).props-txt")
        if len(sizes) > 0:
            size = sizes.text().lstrip("尺码 ：")
        price = ""  # 价格
        prices = product.find("div.p-price p.plus-switch strong")
        if len(prices) > 0:
            price = prices.text()
        pcount = product.find("div.p-quantity div.quantity-form input.itxt").attr("value")# 数量
        yield {
            "序号": 0,
            "商品名称": name,
            "颜色": color,
            "尺码": size,
            "价格": price,
            "数量":pcount,
            "图片": image
        }

# 爬取购物车页面信息
def getJDcartPage():
    url = "https://cart.jd.com/cart.action"
    header = {
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
        # "Accept-Encoding":"gzip, deflate, br",
        "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
        "Connection": "close",
        "Cookie": "user-key=54dbd158-2a63-4029-a793-39bbe54ac96c; shshshfpa=eb9be2ac-c11c-18eb-e4d0-b820327d69a1-1545630718; __jda=122270672.1545630718293321595093.1545630718.1545630718.1545630718.1; __jdc=122270672; __jdv=122270672|direct|-|none|-|1545630718294; __jdu=1545630718293321595093; PCSYCityID=1; ipLoc-djd=1-72-4137-0; areaId=1; cart-main=xx; shshshfpb=eC2cTjRkiLK2BbwxItfcZNA%3D%3D; shshshfp=a2b1c3a22d8340fa822b2b4cc33e0f40; _gcl_au=1.1.191327579.1545630911; 3AB9D23F7A4B3C9B=YLUADSGKHKV4AY2TQIGEEFXF2MZUNGU25YQYECPPEELQ7GBHWDMB7X6KESQL6MKSXLF7I7MHINA34JTUHJAE7PJD6E; cd=0; shshshsID=68c1c65d466b6407915020638d853403_9_1545630951059; __jdb=122270672.21.1545630718293321595093|1.1545630718; cn=7",
        "Host": "cart.jd.com",
        "Referer": "https://cart.jd.com/cart?rd=0.32676008302633996",
        "Upgrade-Insecure-Requests": "1",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0;WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36",
    }
    request = urllib.request.Request(url, headers=header)
    context = ssl._create_unverified_context() # 忽略ssl证书验证
    res = urllib.request.urlopen(request, context=context)
    html=res.read().decode("utf8")
    return html

# 主程序
def main():
    # 1、爬取购物车页面信息
    html=getJDcartPage()
    if html:
        total=0 # 商品药物数量
        # 2、解析购物车页面信息，输出商品列表信息
        productlist=parseCartHtml(html)
        for product in productlist:
            total+=1
            product["序号"]=total # 序号
            print(product)

if __name__=="__main__":
    main()
